import json
import os
import fire
import re
'''
Desired format
[
  {
    "id": "000000033471",
    "image": "000000033471.jpg",
    "conversations": [
      {
        "from": "human",
        "value": "<image>\nWhat are the colors of the bus in the image?"
      },
      {
        "from": "gpt",
        "value": "The bus in the image is white and red."
      },
      {
        "from": "human",
        "value": "What feature can be seen on the back of the bus?"
      },
      {
        "from": "gpt",
        "value": "The back of the bus features an advertisement."
      },
      {
        "from": "human",
        "value": "Is the bus driving down the street or pulled off to the side?"
      },
      {
        "from": "gpt",
        "value": "The bus is driving down the street, which is crowded with people and other vehicles."
      }
    ]
  },
]
'''

'''
from miniGPT-4
{'image_id': 'TaxCocoVizTextCapFilterV3GPTVDetail0627_522418',
 'question': 'describe the image with as many details as possible.',
 'answer': 'This image shows a person with a hairnet and a red shirt standing in front of a large rectangular cake. The cake is white in color and appears to be freshly made, with a pattern of squares on top. The person is holding a serrated knife in their right hand, and it looks like they are about to cut into the cake. Behind them, we can see a door, a stack of gray trays, and a trashcan. The floor is tiled, and the walls are painted beige. There is also a picture hanging on the wall. It seems like the person is in a kitchen or a bakery.'}
'''

def convert_to_llava(input_file, output_file):
    anno = json.load(open(input_file, "r"))
    target_format = []
    if "annotations" in anno:
        anno = anno["annotations"]
    for item_id, item in enumerate(anno):
        image_id = item['image_id']
        if not image_id.endswith(".jpg"):
            image_id += ".jpg"
        target_format.append({
            "id": item_id,
            "image": image_id,
            "conversations": [
                {'from': 'human', 'value': f"{item['question']}\n<image>"},
                {'from': 'gpt', 'value': f"{item['answer']}"},
            ],
        })

    print(f'Number of samples: {len(target_format)}')

    with open(output_file, "w") as f:
        json.dump(target_format, f, indent=2)


def main(task, **kwargs):
    globals()[task](**kwargs)


if __name__ == "__main__":
    fire.Fire(main)
